In [ ]:
import sys
import logging
import os.path as op
In [ ]:
# Import the SeqProp class
from ssbio.protein.sequence.seqprop import SeqProp
In [ ]:
# Printing multiple outputs per cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
Set the logging level in logger.setLevel(logging.<LEVEL_HERE>)
to specify how verbose you want the pipeline to be. Debug is most verbose.
CRITICAL
ERROR
WARNING
INFO
(default)DEBUG
In [ ]:
# Create logger
logger = logging.getLogger()
logger.setLevel(logging.INFO) # SET YOUR LOGGING LEVEL HERE #
In [ ]:
# Other logger stuff for Jupyter notebooks
handler = logging.StreamHandler(sys.stderr)
formatter = logging.Formatter('[%(asctime)s] [%(name)s] %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M")
handler.setFormatter(formatter)
logger.handlers = [handler]
In [ ]:
# SET IDS HERE
PROTEIN_ID = 'YIAJ_ECOLI'
PROTEIN_SEQ = 'MGKEVMGKKENEMAQEKERPAGSQSLFRGLMLIEILSNYPNGCPLAHLSELAGLNKSTVHRLLQGLQSCGYVTTAPAAGSYRLTTKFIAVGQKALSSLNIIHIAAPHLEALNIATGETINFSSREDDHAILIYKLEPTTGMLRTRAYIGQHMPLYCSAMGKIYMAFGHPDYVKSYWESHQHEIQPLTRNTITELPAMFDELAHIRESGAAMDREENELGVSCIAVPVFDIHGRVPYAVSISLSTSRLKQVGEKNLLKPLRETAQAISNELGFTVRDDLGAIT'
In [ ]:
# Create the SeqProp object
my_seq = SeqProp(id=PROTEIN_ID, seq=PROTEIN_SEQ)
In [ ]:
# Write temporary FASTA file for property calculations that require FASTA file as input
import tempfile
ROOT_DIR = tempfile.gettempdir()
my_seq.write_fasta_file(outfile=op.join(ROOT_DIR, 'tmp.fasta'), force_rerun=True)
my_seq.sequence_path
A SeqProp
object is simply an extension of the Biopython SeqRecord
object. Global properties which describe or summarize the entire protein sequence are stored in the annotations
attribute, while local residue-specific properties are stored in the letter_annotations
attribute.
In [ ]:
# Global properties using the Biopython ProteinAnalysis module
my_seq.get_biopython_pepstats()
{k:v for k,v in my_seq.annotations.items() if k.endswith('-biop')}
In [ ]:
# Global properties from the EMBOSS pepstats program
my_seq.get_emboss_pepstats()
{k:v for k,v in my_seq.annotations.items() if k.endswith('-pepstats')}
In [ ]:
# Aggregation propensity - the predicted number of aggregation-prone segments on an unfolded protein sequence
my_seq.get_aggregation_propensity(outdir=ROOT_DIR, email='nmih@ucsd.edu', password='ssbiotest', cutoff_v=5, cutoff_n=5, run_amylmuts=False)
{k:v for k,v in my_seq.annotations.items() if k.endswith('-amylpred')}
In [ ]:
# Kinetic folding rate - the predicted rate of folding for this protein sequence
secstruct_class = 'mixed'
my_seq.get_kinetic_folding_rate(secstruct=secstruct_class)
{k:v for k,v in my_seq.annotations.items() if k.endswith('-foldrate')}
In [ ]:
# Thermostability - prediction of free energy of unfolding dG from protein sequence
# Stores (dG, Keq)
my_seq.get_thermostability(at_temp=32.0)
my_seq.get_thermostability(at_temp=37.0)
my_seq.get_thermostability(at_temp=42.0)
{k:v for k,v in my_seq.annotations.items() if k.startswith('thermostability_')}